'''
* This is the projet for Brtc LlmOps Platform
* @Author Leon-liao <liaosiliang@alltman.com>
* @Description //TODO 
* @File: jieba_service.py
* @Time: 2025/9/26
* @All Rights Reserve By Brtc
'''
from dataclasses import dataclass
import jieba
from jieba.analyse import default_tfidf
from injector import inject
from internal.entity.jieba_entity import STOPWORD_SET


@inject
@dataclass
class JiebaService:
    """解霸分词服务"""
    def __init__(self):
        """解霸分词服务,构造服务, 扩展解霸的分词"""
        default_tfidf.stop_words = STOPWORD_SET

    @classmethod
    def extract_keywords(cls, text:str, max_keyword_pre_chunk:int=10)->list[str]:
        """根据输入的文本,提取对应的文本关键词列表"""
        return jieba.analyse.extract_tags(
            sentence = text,
            topK=max_keyword_pre_chunk
        )
